# ===============================================================================================================================
# ===============================================================================================================================
# Safegraph Analysis
# This code is for replicating: Figures 2, 3, and 6, Table 6,  Appendix Figure A1, and Appendix Table A12
# 
#   1. Import and Prep Safegraph data
#   2. Analysis
#   2. Tables (Generic and Custom)
#   3. Figures (Custom)
# ===============================================================================================================================
# ===============================================================================================================================

# ===============================================================================================================================
#Clear out the namespace
rm(list = ls(all.names = TRUE)) #will clear all objects includes hidden objects. (simpler is rm(list=ls()))
gc() #free up memrory and report the memory usage.
.pardefault <- par()


#Turn off scientific notation (http://datacornering.com/remove-scientific-notation-in-r/)
# options(scipen = 100)
options(scipen = 999)

# Set the path to the root folder ending in RFSDelivery 
cd <- "D:/..."
# Initialize all other paths and import all functions and packages from the AllSettings file.
source(paste0(cd,'/Code/R/AllSettings.R',collapse=''))

#Turn graphics off to allow saving the plots
graphics.off()
dyncoef_drop = "(-16|-15|-14|-13|-12|-11|-10|-9|13|14|15)"

cfolder <- ''



# ===============================================================================================================================
# ===============================================================================================================================
# Section 1: Import and prepare Safegraph data
# ===============================================================================================================================
# ===============================================================================================================================
ptf <- read_parquet(cdd['p_d_sg_pa'] %c% cfolder %c% '/safegraph_analysis_f3_mod04182022.parquet')

# Import the banking desert data
bd <- read_dta(cdd['p_d'] %c% '/Banking Deserts/commdata_revised.dta')
bd <- bd %>% rename(zip = zipcode) %>% mutate(desert = replace_na(desert, 0),
                                              altfin = replace_na(TotalAltCredit, 0),
                                              unemp = UNEMP_CY / TOTPOP_CY,
                                              nonwhite = 1 - (WHITE_CY/ TOTPOP_CY),
                                              banked = (replace_na(NCUA2014, 0) + replace_na(FDIC2014, 0)) / TOTPOP_CY
)

ptf <- left_join(ptf,bd %>% select(zip,unemp,nonwhite,altfin,banked),by='zip')
ptf <- ptf %>% mutate(
  unemp_2g = 1*(unemp > median(ptf[['unemp']],na.rm=TRUE)),
  nonwhite_2g = 1*(nonwhite > median(ptf[['nonwhite']],na.rm=TRUE)),
  altfin_2g = 1*(altfin > median(ptf[['altfin']],na.rm=TRUE)),
  banked_2g = 1*(banked > median(ptf[['banked']],na.rm=TRUE))
)

# # # Filter the data
ptf <- ptf %>% filter((etimeq<8)&(etimeq>=-8))
ptf <- ptf %>% group_by(id) %>% mutate(drop= 1*((treated==0)&(distance_t2cf_rank>5)&(naics %in% c(722511,722513,621111,621210,722515,447110))&(max(randomrank_ind)>0.5))) %>% ungroup()


winsor <- 0.01
ptf <- ptf %>% mutate(
                      visitsn = visits * nfactor,
                      visitorsn = visitors * nfactor,
                      visitsn_log = log(visits * nfactor),
                      visitorsn_log = log(visitors * nfactor),
                      d50_ic50_n1=1*((distance<5000)&(ic>5000)&(neg1present==1)),
                      d50_ic50_n1_n4top8=1*((distance<5000)&(ic>5000)&(etimeq>=-4)&(etimeq<=8))*(neg1present==1),
                      d50_ic50_n1_n4top8_PreCovid=1*((distance<5000)&(ic>5000)&(etimeq>=-4)&(etimeq<=8))*(neg1present==1)*(date < '2020-02-01'),
                      d50_ic50_0=1-d50_ic50,
                      etimey = floor(etimeq/4),
                      d50_ic50_PreCovid = d50_ic50*(date < '2020-02-01'),
                      d50_ic50_PreCovid_n4top8 = d50_ic50*(date < '2020-02-01')*(etimeq>=-4)*(etimeq<=8),
                      d50_ic50_PreCovid_n4top4 = d50_ic50*(date < '2020-02-01')*(etimeq>=-4)*(etimeq<=4),
                      post=1*(etime>=0),
                      na=1
                    )

ptf <- ptf %>% rename(pop = population,
                      popc = population_coh,
                      popcf = population_cohcf)



# Variable labels
labels_visits = list(
  visits='Visits (Raw)',visitors='Visitors (Raw)',nfactor='Norm Factor',
  visitsn_log='Visits Log',visitorsn_log='Visitors Log',
  opening='Opening'
)
labels_coh = list(treated='Treated',etime='Event Time M',etimeq='Event Time Q',etimey='Event Time Y',post='Post',
                  distance='Dist',ic='IC',distance_t2cf='Dist of Target PC to CF PC',date='Date',
                  ctype='Counterfactual type',cohort='PC Cohort',temp_inter='')
labels_indiv = list(sgid='Est',id='Est',zip='Zip Code',state='State',naics='Industry',topcat='Ind Top',subcat="Ind Sub",polyc='PolyC',sqft='SqFt',parking='Parking',synth="Synth")
labels_sample = list(
  restaurant='Restaurant',health='Health',retail='Retail',religious='Religious',luxury='Luxury',
  auto='Auto Repair',grocery='Grocery',school='School',grooming='Grooming',entertainmentf='Entertainment (F)',
  entertainment='Entertainment',clothing='Clothing',vice='Vice',gas='Gas',gym='Gym',
  car='Auto Dealer',bank='Bank',mall='Mall',hotel='Hotel',nonbank='NonBank',
  police='Police',death='Death',liquor='Liquor',pawn='Pawn',payday='Payday',
  autotitle='Autotitle',altcredit='HC Credit',bloodbank='BloodBank',na='All Est')
labels_treat = list(d50_ic50='(D<5)(IC>5)',na='All')
labels_other = list(pop2sqkm_wq10='Density')
labels_all <- c(labels_visits, labels_coh, labels_indiv, labels_other, labels_treat)

# Apply variable labels
labels_sub <- labels_all[names(labels_all) %in% names(ptf)]
var_label(ptf) <- labels_sub

#Set the labels used for all tables
my.style.df = style.df(fixef.title = "", fixef.suffix = " FE", stats.title = "", slopes.title = "")
my.style.tex = style.tex(main="base", fixef.suffix = " FE", slopes.title = "",
                         tablefoot = FALSE,line.bottom="\\bottomrule",line.top="\\toprule",
                         depvar.title = "",var.title="\\midrule", fixef.title = '\\midrule', stats.title = '\\midrule')
setFixest_etable(digits = "r4", digits.stats = "r4", style.df = my.style.df, style.tex = my.style.tex, fitstat = c('n','r2'))
setFixest_dict(unlist(labels_all))





# ===============================================================================================================================
# ===============================================================================================================================
# SECTION 2: Visit Analysis
# ===============================================================================================================================
# ===============================================================================================================================

###################set up regressions we want to loop through########################## 
spec_task = c('est','tbl_sum')
spec_y = list(
  'vrnlog'=c('visitsn_log','visitorsn_log')
)  
spec_i = list(
  'TP'  = 'i(treated,post,0)', #used in Tables 6 and A12
  'Ttq' = 'i(etimeq, treated, ref = -1)' #used in figure 6
)
spec_fe = list(
  'ci_ctpd10_sd'='cohort^id + cohort^etime^pop2sqkm_wq10 + state^date'
)
spec_weight = c(
  'na'
)
#Dictionary to handle the splits by defining the variable and value labels and the base category (for marginal effects)
spec_split = list(
  'naics_g'=list('var'='naics_g','var_label'='Narrow Industry'),
  'naics_sg'=list('var'='naics_sg','var_label'='Broad Industry')
) 
spec_split_m = c('s')  #'na','s','is','ig'
interaction_types = list('is'='specific','ig'='generic')
spec_treat_rest <- c(
  'd50_ic50_n1',
  'd50_ic50_n1_n4top8',
  'd50_ic50_n1_n4top8_PreCovid'
)
spec_cluster <- list(
  'coh'='cohort'
)
spec_lean <- TRUE  #TRUE means small files or FALSE for large files.
scriptv <- 'v8'
spec_suf <- '_ds(ptf)'


##############run regressions specified above and save output################
for (var_tr in spec_treat_rest) {
  for (var_s in names(spec_split)){
    for (var_spm in spec_split_m) {
      for (var_i in names(spec_i)){
        for (var_w in spec_weight){
          for (var_c in names(spec_cluster)){
            for (var_fe in names(spec_fe)){
              if ((var_s == 'na')&(var_spm!='na')){ next }
              if ((var_s != 'na')&(var_spm=='na')){ next }
              if ((var_s != 'na')&(var_spm!='na')){ 
                if (!(spec_split[[var_s]][['var']] %in% names(ptf))) { next }
              }
              
              # Create folder for model, tables, figures
              specl <- ifelse(spec_lean, 'L', 'F')
              spec_ifes <- 'i(' %c% var_i %c% ')_fe('%c% var_fe %c% ')_c('%c% var_c %c%')_w('%c% var_w %c%')_s('%c% var_s %c%')_sm(' %c% var_spm %c% ')' %c% '_trl(' %c% var_tr %c% ')_l(' %c% specl %c% ')_' %c% scriptv %c% spec_suf
              print(spec_ifes)
              mod <- 'DDiD_' %c% spec_ifes %c% '.Rqs'
              models <- list.files(path = cdd['p_rm_sg'] %c% cfolder,pattern='\\.Rqs')
              filem <- cdd['p_rm_sg']  %c% cfolder %c% '/' %c% mod
              folder_rt <- cdd['p_rt'] %c% '/Safegraph/' %c% cfolder %c% spec_ifes
              folder_rf <- cdd['p_rf'] %c% '/Safegraph/' %c% cfolder %c% spec_ifes
              folder_rta <- cdd['p_rt'] %c% '/Safegraph' %c% cfolder %c% '/All'
              folder_rfa <- cdd['p_rf'] %c% '/Safegraph' %c% cfolder %c% '/All'
              
              var_y <- unlist(spec_y)
              var_y <- var_y[var_y %in% names(ptf)]
              var_y <- 'c(' %c% paste0(var_y,collapse=', ') %c% ') '
              # print(3)
              if ('est' %in% spec_task){
                res <- my_feols(
                  df=ptf %>% filter(get(var_tr)==1),
                  labels=labels_sub,
                  form=list('y'=var_y,
                            'i'=spec_i[[var_i]],
                            'fe'=spec_fe[[var_fe]],
                            'cluster'=spec_cluster[[var_c]],
                            's'=spec_split[[var_s]],
                            'spm'=var_spm,
                            'weight'=var_w),
                  filem=filem,
                  lean=spec_lean
                )
                if (res$rc=='error'){
                  print('estimation error')
                  next
                }
                est <- res$est
              }
              
              
              if ('tbl_sum' %in% spec_task){
                tbl_all <- my_etable_df(est)
                openxlsx::write.xlsx(as_tibble(tbl_all), folder_rta %c% '/DDiD_' %c% spec_ifes %c% '.xlsx', row.names=FALSE, overwrite=TRUE)
              }
            }
          }
        }
      }
    }
  }
}




# ===============================================================================================================================
# ===============================================================================================================================
# Section 3: Tables
# ===============================================================================================================================
# ===============================================================================================================================
ofolder <- cdd['p_rt_sg'] %c% cfolder %c% '/Custom'

labels_all[['temp_inter']]='Est. Type'

#Set the labels used for all tables
my.style.df = style.df(fixef.title = "", fixef.suffix = " FE", stats.title = "", slopes.title = "")
my.style.tex = style.tex(main="base", fixef.suffix = " FE", slopes.title = "",
                         tablefoot = FALSE,line.bottom="\\bottomrule",line.top="\\toprule",
                         depvar.title = "",var.title="\\midrule", fixef.title = '\\midrule', stats.title = '\\midrule')
setFixest_etable(digits = "r4", digits.stats = "r4", style.df = my.style.df, style.tex = my.style.tex, fitstat = c('n','r2'))
setFixest_dict(unlist(labels_all))


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Table 6: Effect of plasma openings on establishment-level foot traffic
estd <- qread(cdd['p_rm_sg'] %c% cfolder %c% '/DDiD_i(TP)_fe(ci_ctpd10_sd)_c(coh)_w(na)_s(naics_g)_sm(s)_trl(d50_ic50n_n1_n4top8)_l(L)_v8_ds(ptf).Rqs')
estd_pc <- qread(cdd['p_rm_sg'] %c% cfolder %c% '/DDiD_i(TP)_fe(ci_ctpd10_sd)_c(coh)_w(na)_s(naics_g)_sm(s)_trl(d50_ic50_n1_n4top8_PreCovid)_l(L)_v8_ds(ptf).Rqs')
est_ext1 <- estd[lhs='visitsn_log', sample=c('^Grocery','^School')]
est_ext2 <- estd_pc[lhs='visitsn_log', sample=c('^Grocery')]
est_ext_tbl <- my_etable_df(c(as.list(est_ext1),as.list(est_ext2)))
tl <- c(as.list(est_ext1),as.list(est_ext2))[c(1,3,2)]
my_etable(tl, file= ofolder %c% '/Table 6: DDiD_Summary_Visits_log_20231129_d50_ic50_n1_n4top8.tex')

rm(estd)
rm(estd_pc)
rm(est_ext1)
rm(est_ext2)
gc()
mem_used()



#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Table A12: Effect of plasma openings on establishment-level foot traffic
estd <- qread(cdd['p_rm_sg'] %c% cfolder %c% '/DDiD_i(TP)_fe(ci_ctpd10_sd)_c(coh)_w(na)_s(naics_g)_sm(s)_trl(d50_ic50n_n1_n4top8)_l(L)_v8_ds(ptf).Rqs')
est_ext1 <- estd[lhs='visitsn_log', sample=c('^Gas','^Grocery','^Entertainment','^Liquor')]
est_ext2 <- estd[lhs='visitorsn_log', sample=c('^Gas','^Grocery','^Entertainment','^Liquor')]
est_ext_tbl <- my_etable_df(c(as.list(est_ext1),as.list(est_ext2)))
tl <- c(as.list(est_ext1),as.list(est_ext2))[c(1,2,3,4,5,6,7,8,9)]
my_etable(tl, file= ofolder %c% '/Table A12: DDiD_Summary_Visits_log_20231129_d50_ic50_n1_n4top8.tex')

rm(estd)
rm(est_ext1)
rm(est_ext2)
gc()
mem_used()



# ===============================================================================================================================
# ===============================================================================================================================
# Section 4: Figures
# ===============================================================================================================================
# ===============================================================================================================================
ofolder <- cdd['p_rf_sg'] %c% cfolder %c% 'Custom'

#Set the labels used for all tables
my.style.df = style.df(fixef.title = "", fixef.suffix = " FE", stats.title = "", slopes.title = "")
my.style.tex = style.tex(main="base", fixef.suffix = " FE", slopes.title = "",
                         tablefoot = FALSE,line.bottom="\\bottomrule",line.top="\\toprule",
                         depvar.title = "",var.title="\\midrule", fixef.title = '\\midrule', stats.title = '\\midrule')
setFixest_etable(digits = "r4", digits.stats = "r4", style.df = my.style.df, style.tex = my.style.tex, fitstat = c('n','r2'))
setFixest_dict(unlist(labels_all))


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Figure 2: Distinct visitors to plasma storefronts, SafeGraph data, monthly rates
vis <- read_parquet(cdd['p_d_sg'] %c% '/ProximityAnalysis/prox_cbgv_cdf2.parquet')
vis <- vis %>% mutate(distance_e2o = dist_bins/10, mean=100*mean, p25 = 100*p25, p50 = 100*p50, p75 = 100*p75)
avis <- vis %>% select(c(distance_e2o,mean,p25,p75)) %>% melt(id.vars = "distance_e2o")
g1 <- ggplot(data=avis, aes(x=distance_e2o, y=value, color=variable)) +
  theme_minimal() + theme(legend.position = "bottom", legend.title=element_blank()) +
  geom_line() +
  scale_color_grey() + theme_classic() +
  labs(x="Distance (km)",y="%") +
  coord_cartesian(xlim =c(0,20), ylim = c(0,100)) 
ggsave(ofolder %c% '/Figure 2: panel A: Proximity_cdf_simp.png',plot=g1,dpi=1200)

rm(vis)
rm(avis)
gc()
mem_used()


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Figure 3: SafeGraph-tracked monthly visits to plasma centers during the COVID-19 Pandemic
ptm <- read_parquet(cdd['p_d_sg_pva'] %c% '/ptm_aug_PlasmaEst.parquet')
ptm <- ptm %>% mutate(dt_txt=format(dt, "%b-%y"))
ptms <- ptm %>% filter(etime>24) %>% filter((dtm<'2021-06-01')&(dtm>='2019-01-01')) %>% 
  group_by(dtm, dt_txt) %>% reframe(enframe(quantile(visitsn, c(0.25, 0.5, 0.75)), "quantile", "visitsn"))
gg <- ggplot(ptms, aes(dtm, visitsn)) + 
  geom_line(aes(color=quantile),size=1)  +  
  geom_vline(xintercept=as.Date("2020-04-14"),color='green4',linetype='dashed', size=0.75) +
  geom_vline(xintercept=as.Date("2021-01-04"),color='green4',linetype='dashed', size=0.75) +
  geom_vline(xintercept=as.Date("2021-03-17"),color='green4',linetype='dashed', size=0.75) +
  scale_x_date(labels = function(z) format(z, format = "%b-%y")) +
  theme_minimal(base_size = 20) +
  theme(legend.title=element_blank()) +
  geom_hline(yintercept = 0, color = "black", size=1) +
  theme(
    axis.text.x = element_text(face="plain", color='black'),
    axis.text.y = element_text(face="plain", color='black'),
    legend.text = element_text(face="plain", color='black')) +
  labs(x='Event Time',y='Visits') + 
  theme(legend.position = "bottom") +
  scale_color_jco()
ggsave(filename='Figure 3: StimulusChecks_Visits_log_IQR.png',plot=gg,path = cdd['p_rf_sg'],dpi=120,width=16,height=9)

rm(ptm)
rm(ptms)
gc()
mem_used()


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Figure 6: Effect of plasma openings on establishment-level foot traffic
estd <- qread(cdd['p_rm_sg'] %c% cfolder %c% '/DDiD_i(Ttq)_fe(ci_ctpd10_sd)_c(coh)_w(na)_s(naics_g)_sm(s)_trl(d50_ic50_n1)_l(L)_v8_ds(ptf).Rqs')
estb <- qread(cdd['p_rm_sg'] %c% cfolder %c% '/DDiD_i(Ttq)_fe(ci_ctpd10_sd)_c(coh)_w(na)_s(naics_sg)_sm(s)_trl(d50_ic50_n1)_l(L)_v8_ds(ptf).Rqs')
lhsvars = c('visitsn_log')
mod = '_log_20231129_d50_ic50_n1'
#Essential Foot-traffic
gg <- my_iplot(estb[lhs=lhsvars, sample=c('^Essential')],labels=labels_all,file=ofolder %c% '/Figure 6: Panel A: Pane 1 - DDiD_Essentials'%c% mod %c% '.png')
gg <- my_iplot(estd[lhs=lhsvars, sample=c('^Gas')],labels=labels_all,file=ofolder %c% '/Figure 6: Panel A: Pane 2 - DDiD_Gas'%c% mod %c% '.png')
gg <- my_iplot(estd[lhs=lhsvars, sample=c('^Grocery')],labels=labels_all,file=ofolder %c% '/Figure 6: Panel A: Pane 3 - DDiD_Grocery'%c% mod %c% '.png')
#Non-essential Foot-traffic
gg <- my_iplot(estb[lhs=lhsvars, sample=c('^Non-Essential')],labels=labels_all,file=ofolder %c% '/Figure 6: Panel B: Pane 1 - DDiD_NonEssentials'%c% mod %c% '.png')
gg <- my_iplot(estd[lhs=lhsvars, sample=c('^Entertainment')],labels=labels_all,file=ofolder %c% '/Figure 6: Panel B: Pane 2 -DDiD_Entertainment'%c% mod %c% '.png')
gg <- my_iplot(estd[lhs=lhsvars, sample=c('^Restaurant')],labels=labels_all,file=ofolder %c% '/Figure 6: Panel B: Pane 3 - DDiD_Restaurant'%c% mod %c% '.png')
#Placebo Foot-traffic
gg <- my_iplot(estd[lhs=lhsvars, sample=c('^School')],labels=labels_all,file=ofolder %c% '/Figure 6: Panel c: - DDiD_School'%c% mod %c% '.png')
rm(estd)
rm(estb)
gc()
mem_used()


#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Appendix Figure A1: Average visits to newly opened plasma centers (first-stage effect)
ptm <- read_parquet(cdd['p_d_sg_pva'] %c% '/ptm_aug_PlasmaEst.parquet')
ptm <- ptm %>% mutate(dt_txt=format(dt, "%b-%y"))
ptms <- ptm %>% filter(dtm<'2020-02-01') %>% group_by(etime) %>% summarise(visitsn_m = mean(visitsn), visitsn_s = (var(visitsn) / n())^0.5) %>% ungroup()
ptms <- ptms %>% mutate(visitsn_m_l = visitsn_m - visitsn_s*1.96, visitsn_m_u = visitsn_m + visitsn_s*1.96)
gg <- ggplot(ptms %>% filter((etime>=0)&(etime<=24)), aes(x=etime, y=visitsn_m)) +  
  geom_errorbar(aes(ymin=visitsn_m_l, ymax=visitsn_m_u), width=.1) +
  geom_line(size=1) + 
  geom_point(size=1) +
  theme_minimal(base_size = 20) +
  theme_minimal(base_size = 20) +
  theme(legend.title=element_blank()) +
  geom_hline(yintercept = 0, color = "black", size=1) +
  theme(
    axis.text.x = element_text(face="plain", color='black'),
    axis.text.y = element_text(face="plain", color='black'),
    legend.text = element_text(face="plain", color='black')) +
  labs(x='Event Time',y='Visits') + 
  scale_color_jco()
ggsave(filename='Figure A1: Plasma_Visits_avg_ci.png',plot=p,path = cdd['p_rf_sg'],dpi=120,width=16,height=9)

rm(ptm)
rm(ptms)
gc()
mem_used()




